Determining abundance of deer species in Victoria using camera trap data
Load in relevant packages for analysis, additionally, connect to the database. Camera trap and site data is stored on the database.
library(cmdstanr)
library(dplyr)
library(sf)
library(bayesplot)
library(loo)
library(gt)
library(terra)
library(caret)
library(tidyterra)
library(tidyr)
library(VicmapR)
library(kableExtra)
check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)
register_knitr_engine(override = FALSE)
options(mc.cores=8)
#### Database connection ####
con <- weda::weda_connect(password = keyring::key_get(service = "ari-dev-weda-psql-01",
username = "psql_user"), username = "psql_user")
Additional functions used in the data preparation, modelling and analysis are available in the /functions directory.
# Source internal functions
sapply(list.files("functions", full.names = T), source, verbose = F)
Wrangle and format data for the STAN models for the various species.
Outline which species should be modeled, and which projects to source data from.
# Species to run model for.
deer_species_all <- c("Cervus unicolor", "Dama dama", "Cervus elaphus", "Axis porcinus")
# Projects to select.
project_short_name <- c("hog_deer_2023", "StatewideDeer")
# Buffer for data extraction.
spatial_buffer <- 1000
# Covariate Rasters
raster_files <- "/Volumes/DeerVic\ Photos/Processed_Rasters"
# raster_files <- "data/prediction_raster"
prediction_raster <- "data/prediction_raster/statewide_raster.tif"
# For the integrated model we place limits on the maximum density of deer to integrate over. In cases where there are no detections on the camera this is limited to 15 per km2. In cases where deer were detetected on the camera this is expanded to 50. We believe this is sufficiently high. Very high values of these will be less efficient.
n_max_no_det <- 15
n_max_det <- 50
Download the camera locations from the database, this table outlines the locations and the deployment history of the cameras.
cams_curated <- tbl(con, dbplyr::in_schema("camtrap", "curated_camtrap_operation")) %>%
dplyr::filter(ProjectShortName %in% !!project_short_name) %>% # Only retrieve cam records for hog deer 2023
dplyr::collect() %>%
dplyr::arrange(SiteID) %>%
sf::st_as_sf(., coords = c("Longitude", "Latitude"), crs = 4283)
n_site <- nrow(cams_curated)
Here we outline formulas to be used in the models. The formulas account for the various fixed-effect parameters.
#### Model formulas ####
#### Transect Formula: Survey Only ####
transect_formula <- ~Survey
#### Abundance Formula Options ####
# Simple Formula
ab_formula_2 <- ~ scale(sqrt(BareSoil)) + scale(sqrt(Nitrogen)) + scale(sqrt(PastureDistance)) + scale(TreeDensity) + scale(sqrt(ForestEdge))
# Simple with Coastal Distance: Hog Deer
ab_formula_3 <- ~ scale(sqrt(BareSoil)) + scale(sqrt(Nitrogen)) + scale(sqrt(PastureDistance)) + scale(TreeDensity) + scale(sqrt(ForestEdge)) + scale(dist_coast)
#### Detection Formula: Distance-sampling ####
det_formula <- ~ scale(HerbaceousUnderstoryCover) # average detection across all sites #scale(HerbaceousUnderstoryCover)
det_formula2 <- ~1
Using the prepare_model_data() function we generate the data for the STAN model. This function will:
1. Download data from the database 2. Format that data to match the distance-sampling bins
3. Organise the counts into sites and group sizes
4. Generate model matrix of the various submodels (distance-sampling, abundance and transect) 5. Generate data for the prediction process 6. Generate data for the random effect (bioregion)
7. Generate data for regional predictions (indexing based on DEECA regions)
model_data_file <- "data/model_data.rds"
if(!file.exists(model_data_file)) {
evaluate_transects <- c(TRUE, TRUE, TRUE, TRUE)
model_data <- list()
for(i in 1:length(deer_species_all)) {
if(deer_species_all[i] == "Cervus elaphus") {
det_to_use <- det_formula2
} else {
det_to_use <- det_formula
}
if(deer_species_all[i] == "Axis porcinus") {
ab_to_use <- ab_formula_3
} else {
ab_to_use <- ab_formula_2
}
model_data[[i]] <- prepare_model_data(species = deer_species_all[i],
projects = project_short_name,
buffer = spatial_buffer,
detection_formula = det_to_use,
abundance_formula = ab_to_use,
transect_formula = transect_formula,
con = con,
raster_dir = raster_files,
prediction_raster = prediction_raster,
n_max_no_det = n_max_no_det,
n_max_det = n_max_det,
evaltransects = evaluate_transects[i],
snapshot_interval = 2,
hs_df = 1,
hs_df_global = 1,
hs_scale_global = 2/sqrt(nrow(cams_curated)), # ratio of expected non-zero to zero divided by total observation as per brms convention
hs_scale_slab = 1,
hs_df_slab = 4)
# not used in model but for plots later
model_data[[i]]$raw_data[is.na(model_data[[i]]$raw_data)] <- 0
model_data[[i]]$transects[is.na(model_data[[i]]$transects)] <- 0
}
names(model_data) <- deer_species_all
saveRDS(model_data, model_data_file)
} else {
model_data <- readRDS(model_data_file)
}
Below we list the MCMC setting for our model. We run models on eight parallel chains for 1,000 iterations each (300 warmup and 300 sampling). These setting provide us with 4,000 posterior draws (8 x 500).
# STAN settings
ni <- 250 # sampling iterations
nw <- 250 # warmup iterations
nc <- 6 # number of chains
These are the models used in the analysis. The first is an integrated model that requires transect and camera data. The second is a count-only model that just requires the camera data.
functions {
/* Efficient computation of the horseshoe prior
* Args:
* zb: standardized population-level coefficients
* global: global horseshoe parameters
* local: local horseshoe parameters
* scale_global: global scale of the horseshoe prior
* c2: positive real number for regularization
* Returns:
* population-level coefficients following the horseshoe prior
*/
vector horseshoe(vector zb, vector[] local, real[] global,
real scale_global, real c2) {
int K = rows(zb);
vector[K] lambda = local[1] .* sqrt(local[2]);
vector[K] lambda2 = square(lambda);
real tau = global[1] * sqrt(global[2]) * scale_global;
vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
return zb .* lambda_tilde * tau;
}
}
data {
int<lower=0> N; // number of observations
real delta; // bin width
int<lower=1> n_site; // site
int<lower=1> n_distance_bins; // distance bins
int<lower=1> n_gs; // number of group sizes
vector[n_gs] gs; // group sizes
vector[n_distance_bins] midpts; // midpt of each interval
real<lower=1> max_distance; // truncation distance
int<lower=1> max_int_dist; // max distance as an integer
real<lower=0> theta_frac; // fraction of camera view
array[n_site] int effort; // effort
array[n_site, n_gs] int n_obs; //number of observations at each site and for varying group sizes
array[n_site, n_distance_bins, n_gs] int y; // observations matrix
// summary of whether species is known to be present at each site
int<lower = 0, upper = 1> any_seen[n_site];
// number of surveys at each site
int<lower = 0> n_survey[n_site];
// availability prior information
real<lower=0> bshape; // availability shape
real<lower=0> bscale; // availability scale
// camera trap detection parameters
int<lower=0> det_ncb; // number of covariates for detection model
matrix[n_site, det_ncb] det_model_matrix; // detection model matrix
array[n_gs, n_distance_bins] real pa ; // averaged availability for multiple individuals
// Abundance/occupancy model matrix
int<lower = 1> m_psi; // number of covariates
matrix[n_site, m_psi] X_psi; // model matrix
// hs params
real<lower=0> hs_df; // horseshoe prior param: see brms documentation of hs() for explantations of data
real<lower=0> hs_df_global;
real<lower=0> hs_df_slab;
real<lower=0> hs_scale_global;
real<lower=0> hs_scale_slab;
//transect level information
int<lower=1> trans; // total number of transects across all sites for all methods
int<lower = 0, upper = 1> y2[trans]; // transect based binomial detections
int<lower = 0, upper = trans> start_idx[n_site]; // first "transect" of the site, which is the camera detection
int<lower = 0, upper = trans> end_idx[n_site]; // last transect of the site
int<lower=1> trans_det_ncb; // number of covariates for transect detection model
matrix[trans, trans_det_ncb] trans_pred_matrix; // transect detection model matrix
int<lower=1> n_max[n_site]; // max for poisson RN
// Prediction data
int<lower=1> npc; // number of prediction grid cells
matrix[npc, m_psi] X_pred_psi; // pred matrix
vector[npc] prop_pred; // offset for prediction
// bioregion RE
int<lower=1> np_bioreg; // number of bioregions
int<lower=1> site_bioreg[n_site]; // bioregion of each site
int<lower=1> pred_bioreg[npc]; // bioregion of each prediction grid
// DEECA region data
int<lower=1> np_reg; // number of regions
int<lower=1> site_reg[n_site]; // region of each site
int<lower=1> pred_reg[npc]; // region of each prediction grid
}
transformed data {
vector[n_site] survey_area; // total survey area, calculated from snapshot moments and camera field of view
vector[n_site] cam_seen; // whether any have been seen at a site on camera
for(i in 1:n_site) {
survey_area[i] = theta_frac * effort[i] * pi() * (max_distance/1000)^2;
cam_seen[i] = sum(n_obs[i,]);
}
// for (i in 1:n_distance_bins) {
// //pi[i] = delta/max_distance; // availability function (line transect)
// pi[i] = (2 * delta * midpts[i])/max_distance^2; // point transect
// }
}
parameters {
// abundance parameters
simplex[n_gs] eps_ngs; // random variation in group size
vector[1] beta_intercept; // intercept parameter (not subject to horseshoe)
// detection parameters
vector[det_ncb] beta_det; // coefficients for detection submodel
// transect detection parameters
vector[trans_det_ncb] beta_trans_det; // coefficients for transect submodel
// temporal availability parameters
real<lower=0, upper=1> activ; // average activity of deer, equates to the time they are available for detection
// bioregion RE
real<lower=0> bioregion_sd; // standard deviation for bioregion random effect
vector[np_bioreg] bioregion_raw; // bioregion-level effect
// local parameters for horseshoe prior
vector[m_psi-1] zb;
vector<lower=0>[m_psi-1] hs_local[2];
// horseshoe shrinkage parameters
real<lower=0> hs_global[2];
real<lower=0> hs_c2;
}
transformed parameters {
// random effects
vector[np_bioreg] eps_bioregion; // bioregion random effect
// distance parameters
array[n_site] real log_sigma; // log of distance sampling detection function sigma
array[n_site] real sigma; // distance sampling detection function
array[n_site, n_distance_bins] real p_raw; // detection probability
array[n_site, n_distance_bins, n_gs] real p_raw_scale; // detection probability for point independence model and accounting for availability
array[n_site, n_distance_bins, n_gs] real<upper=0> log_p_raw; // log detection probability accounting for availability
array[n_site, n_gs] real log_p; // log probability with summed exponential for the multinomial logit model
array[n_site, n_gs] real<lower=0,upper=1> p; // log probability with summed exponential for the multinomial logit model
// abundance parameters
array[n_site, n_gs] real<lower=0> lambda; // expected number of groups for the camera surveys
// activity parameters
real log_activ = log(activ);
vector[trans] logit_trans_p = trans_pred_matrix * beta_trans_det; // observation process model
// lp_site for RN model
vector[n_site] lp_site; // log-probability for each site based on RN loops
vector[trans] r = inv_logit(logit_trans_p); // probability of detection for each transect observation/period, the first at each site is the camera-level detection rate
vector[n_site] log_lambda_psi; //log lambda for estimating number of groups per site
// hs betas
vector[m_psi-1] beta_covs; // covariates with horseshow prior
vector[m_psi] beta_psi; // covariates combined with intercept
beta_covs = horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2); // horseshoe bior placed on beta covariates
beta_psi = append_row(beta_intercept, beta_covs); // combine intercept and covariates
for(b in 1:np_bioreg) {
eps_bioregion[b] = bioregion_sd * bioregion_raw[b]; // generate bioregion random effect
}
for(n in 1:n_site) {
log_sigma[n] = det_model_matrix[n,] * beta_det; // calculate sigma from detection function
sigma[n] = exp(log_sigma[n]);
for (i in 1:n_distance_bins) {
// assuming a half-normal detection fn from line
p_raw[n,i] = exp(-(midpts[i])^2/(2*sigma[n]^2)); // half-normal function (pg 419 of AHM)
// hazard function
// p_raw[n,i] = 1 - exp(-(midpts[i]/theta)^(-sigma[n])); // hazard function (pg 507 of AHM)
for(j in 1:n_gs) {
p_raw_scale[n,i,j] = p_raw[n,i]*pa[j,i]; // pr(animal occurs and is detected in bin i)
log_p_raw[n,i,j] = log(p_raw_scale[n,i,j]);
}
}
log_lambda_psi[n] = X_psi[n,] * beta_psi + eps_bioregion[site_bioreg[n]]; //site-level estimate of abundance of groups
// for various group sizes break down the estimated detection rates and the estimated camera counts
for(j in 1:n_gs) {
log_p[n,j] = log_sum_exp(log_p_raw[n,,j]);
p[n,j] = exp(log_p[n,j]);
// model site abundance taking into account true abundance, distance sampling (log_p),
// temporal availability (log_activ), proportional group size (epgs_ngs), and spatial
// availability/probability of camera-level detection (r[start_idx[n]])
// offset is the survey area, which includes the snapshot moments and area in camera field of view
lambda[n,j] = exp(log_lambda_psi[n] + log_p[n,j] + log_activ + log(eps_ngs[j]) + log(r[start_idx[n]])) .* survey_area[n];
}
// Royle-Nichols implementation in STAN (looping over possible discrete values of N)
// https://discourse.mc-stan.org/t/royle-and-nichols/14150
// https://discourse.mc-stan.org/t/identifiability-across-levels-in-occupancy-model/5340/2
if (n_survey[n] > 0) {
vector[n_max[n] - any_seen[n] + 1] lp;
// seen
if(any_seen[n] == 0){ // not seen
lp[1] = poisson_lpmf(0 | exp(log_lambda_psi[n]));
}
// not seen
// lp 1 simplification (not necessary)
else lp[1] = poisson_lpmf(1 | exp(log_lambda_psi[n])) +
bernoulli_lpmf(y2[start_idx[n]:end_idx[n]] | r[start_idx[n]:end_idx[n]]);
// loop through possible values for maximum count (km2)
for (j in 2:(n_max[n] - any_seen[n] + 1)){
lp[j] = poisson_lpmf(any_seen[n] + j - 1 | exp(log_lambda_psi[n]))
+ bernoulli_lpmf(y2[start_idx[n]:end_idx[n]] | 1 - (1 - r[start_idx[n]:end_idx[n]])^(any_seen[n] + j - 1));
}
lp_site[n] = log_sum_exp(lp);
} else{
lp_site[n] = 0;
}
}
}
model {
beta_det ~ normal(0, 4); // prior for sigma
eps_ngs ~ uniform(0, 1); // prior for group size effect
beta_intercept ~ normal(-3, 3); // prior for intercept in poisson model
beta_trans_det ~ normal(0, 3); // beta for transect detection
activ ~ beta(bshape, bscale); //informative prior
bioregion_sd ~ normal(0, 2); // prior for bioregion RE SD
bioregion_raw ~ normal(0,1); // prior for bioregion RE effect
for(n in 1:n_site) {
for(j in 1:n_gs) {
target += poisson_lpmf(n_obs[n,j] | lambda[n,j]);
y[n,,j] ~ multinomial_logit(to_vector(log_p_raw[n,,j]));
}
target += lp_site[n];
}
// priors including all constants: poisson
target += normal_lpdf(zb | 0, 1);
target += normal_lpdf(hs_local[1] | 0, 1)
- 101 * log(0.5);
target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
target += normal_lpdf(hs_global[1] | 0, 1)
- 1 * log(0.5);
target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
}
generated quantities {
array[n_site,n_gs] real n_obs_pred;
array[n_site, n_gs] real n_obs_true;
array[n_site] real N_site;
array[n_site] real N_site_pred;
array[n_site, max_int_dist+1] real DetCurve;
array[n_site, n_gs] real log_lik1;
array[n_site, n_gs] real log_lik2;
array[n_site] real log_lik;
vector[n_site] Site_lambda;
vector[n_site] psi;
array[npc] real pred;
array[npc] real pred_trunc;
array[np_reg] real Nhat_reg;
real av_gs;
real Nhat;
real Nhat_trunc;
int trunc_counter;
trunc_counter = 0;
for(n in 1:n_site) {
for(j in 1:n_gs) {
log_lik1[n,j] = multinomial_logit_lpmf(y[n,,j] | to_vector(log_p_raw[n,,j])); //for loo
log_lik2[n,j] = poisson_lpmf(n_obs[n,j] | lambda[n,j]); //for loo
n_obs_true[n, j] = gs[j] * (poisson_log_rng(log_lambda_psi[n] + log(eps_ngs[j])));
n_obs_pred[n,j] = gs[j] * poisson_rng(lambda[n,j]);
}
// get loglik on a site level
log_lik[n] = log_sum_exp(log_sum_exp(log_sum_exp(log_lik1[n,]), log_sum_exp(log_lik2[n,])), lp_site[n]);
Site_lambda[n] = exp(log_lambda_psi[n]);
N_site[n] = sum(n_obs_true[n,]);
N_site_pred[n] = sum(n_obs_pred[n,]);
// Occupancy probability transformation
psi[n] = inv_cloglog(log_lambda_psi[n]);
// loop over distance bins
for(j in 0:max_int_dist) { // get DS predictions for distance 0 to max bin distance
DetCurve[n, j+1] = exp(-(j+0.5)^2/(2*sigma[n]^2)); // half normal
// DetCurve[n, j+1] = 1 - exp(-(j+0.5/theta)^(-sigma[n])); //hazard rate
}
}
av_gs = sum(gs .* eps_ngs);
for(i in 1:np_reg) Nhat_reg[i] = 0;
for(i in 1:npc) {
pred[i] = poisson_log_rng(X_pred_psi[i,] * beta_psi + eps_bioregion[pred_bioreg[i]]) * prop_pred[i] * av_gs; //offset
if(pred[i] > max(N_site)) {
pred_trunc[i] = max(N_site);
trunc_counter += 1;
} else {
pred_trunc[i] = pred[i];
} // upper limit placed at highest site estimate
Nhat_reg[pred_reg[i]] += pred[i];
}
Nhat = sum(pred);
Nhat_trunc = sum(pred_trunc);
}functions {
/* Efficient computation of the horseshoe prior
* Args:
* zb: standardized population-level coefficients
* global: global horseshoe parameters
* local: local horseshoe parameters
* scale_global: global scale of the horseshoe prior
* c2: positive real number for regularization
* Returns:
* population-level coefficients following the horseshoe prior
*/
vector horseshoe(vector zb, vector[] local, real[] global,
real scale_global, real c2) {
int K = rows(zb);
vector[K] lambda = local[1] .* sqrt(local[2]);
vector[K] lambda2 = square(lambda);
real tau = global[1] * sqrt(global[2]) * scale_global;
vector[K] lambda_tilde = sqrt(c2 * lambda2 ./ (c2 + tau^2 * lambda2));
return zb .* lambda_tilde * tau;
}
}
data {
int<lower=0> N; // number of observations
real delta; // bin width
int<lower=1> n_site; // site
int<lower=1> n_distance_bins; // distance bins
int<lower=1> n_gs; // number of group sizes
vector[n_gs] gs; // group sizes
vector[n_distance_bins] midpts; // midpt of each interval
real<lower=1> max_distance; // truncation distance
int<lower=1> max_int_dist; // max distance as an integer
real<lower=0> theta_frac; // fraction of camera view
array[n_site] int effort; // effort
array[n_site, n_gs] int n_obs; //number of observations
array[n_site, n_distance_bins, n_gs] int y; // observations matrix
// summary of whether species is known to be present at each site
int<lower = 0, upper = 1> any_seen[n_site];
// number of surveys at each site
int<lower = 0> n_survey[n_site];
// availability prior information
real<lower=0> bshape; // availability shape
real<lower=0> bscale; // availability scale
// camera trap detection parameters
int<lower=0> det_ncb; // number of covariates for detection model
matrix[n_site, det_ncb] det_model_matrix; // detection model matrix
array[n_gs, n_distance_bins] real pa ; // averaged availability for multiple individuals
// Abundance/occupancy model matrix
int<lower = 1> m_psi;
matrix[n_site, m_psi] X_psi;
// negbinom scale
// real reciprocal_phi_scale;
// hs params
real<lower=0> hs_df; // horseshoe prior param
real<lower=0> hs_df_global;
real<lower=0> hs_df_slab;
real<lower=0> hs_scale_global;
real<lower=0> hs_scale_slab;
//transect level information
int<lower=1> trans; // total number of transects across all sites for all methods
int<lower = 0, upper = 1> y2[trans]; // transect based binomial detections
int<lower = 0, upper = trans> start_idx[n_site];
int<lower = 0, upper = trans> end_idx[n_site];
int<lower=1> trans_det_ncb; // number of covariates for transect detection model
matrix[trans, trans_det_ncb] trans_pred_matrix; // transect detection model matrix
int<lower=1> n_max[n_site]; // max for poisson RN
// Prediction data
int<lower=1> npc; // number of prediction grid cells
matrix[npc, m_psi] X_pred_psi; // pred matrix
vector[npc] prop_pred; //offset
// bioregion RE
int<lower=1> np_bioreg;
int<lower=1> site_bioreg[n_site];
int<lower=1> pred_bioreg[npc];
// region data
int<lower=1> np_reg;
int<lower=1> site_reg[n_site];
int<lower=1> pred_reg[npc];
}
transformed data {
// vector[n_distance_bins] pi; // availability for point
vector[n_site] survey_area;
vector[n_site] cam_seen;
for(i in 1:n_site) {
survey_area[i] = theta_frac * effort[i] * pi() * (max_distance/1000)^2;
cam_seen[i] = sum(n_obs[i,]);
}
}
parameters {
// abundance parameters
simplex[n_gs] eps_ngs; // random variation in group size
vector[1] beta_intercept;
vector[det_ncb] beta_det;
// transect detection parameters
vector[trans_det_ncb] beta_trans_det;
// temporal availability parameters
real<lower=0, upper=1> activ;
// bioregion RE
real<lower=0> bioregion_sd;
vector[np_bioreg] bioregion_raw;
// local parameters for horseshoe prior
vector[m_psi-1] zb;
vector<lower=0>[m_psi-1] hs_local[2];
// horseshoe shrinkage parameters
real<lower=0> hs_global[2];
real<lower=0> hs_c2;
}
transformed parameters {
// random effects
vector[np_bioreg] eps_bioregion; // bioregion random effect
// distance parameters
array[n_site] real log_sigma;
array[n_site] real sigma;
array[n_site, n_distance_bins] real p_raw; // detection probability
array[n_site, n_distance_bins, n_gs] real p_raw_scale; // detection probability for point independence model and accounting for availability
array[n_site, n_distance_bins, n_gs] real<upper=0> log_p_raw; // log detection probability accounting for availability
array[n_site, n_gs] real log_p; // log probability with summed exponential for the multinomial logit model
array[n_site, n_gs] real<lower=0,upper=1> p; // log probability with summed exponential for the multinomial logit model
// abundance parameters
array[n_site, n_gs] real<lower=0> lambda;
// activity parameters
real log_activ = log(activ);
// lp_site for RN model
vector[n_site] log_lambda_psi;
// hs betas
vector[m_psi-1] beta_covs;
vector[m_psi] beta_psi;
beta_covs = horseshoe(zb, hs_local, hs_global, hs_scale_global, hs_scale_slab^2 * hs_c2);
beta_psi = append_row(beta_intercept, beta_covs);
for(b in 1:np_bioreg) {
eps_bioregion[b] = bioregion_sd * bioregion_raw[b];
}
for(n in 1:n_site) {
log_sigma[n] = det_model_matrix[n,] * beta_det;
sigma[n] = exp(log_sigma[n]);
for (i in 1:n_distance_bins) {
// assuming a half-normal detection fn from line
p_raw[n,i] = exp(-(midpts[i])^2/(2*sigma[n]^2)); // half-normal function (pg 419 of AHM)
// hazard function
// p_raw[n,i] = 1 - exp(-(midpts[i]/theta)^(-sigma[n])); // hazard function (pg 507 of AHM)
for(j in 1:n_gs) {
p_raw_scale[n,i,j] = p_raw[n,i]*pa[j,i]; // pr(animal occurs and is detected in bin i)
log_p_raw[n,i,j] = log(p_raw_scale[n,i,j]);
}
}
log_lambda_psi[n] = X_psi[n,] * beta_psi + eps_bioregion[site_bioreg[n]];
for(j in 1:n_gs) {
log_p[n,j] = log_sum_exp(log_p_raw[n,,j]);
p[n,j] = exp(log_p[n,j]);
// model site abundance
lambda[n,j] = exp(log_lambda_psi[n] + log_p[n,j] + log_activ + log(eps_ngs[j])) .* survey_area[n];
}
}
}
model {
beta_det ~ normal(0, 4); // prior for sigma
eps_ngs ~ uniform(0, 1); // prior for group size effect
beta_intercept ~ normal(-3, 3); // prior for intercept in poisson model
activ ~ beta(bshape, bscale); //informative prior
bioregion_sd ~ normal(0, 2);
bioregion_raw ~ normal(0,1);
for(n in 1:n_site) {
for(j in 1:n_gs) {
target += poisson_lpmf(n_obs[n,j] | lambda[n,j]);
y[n,,j] ~ multinomial_logit(to_vector(log_p_raw[n,,j]));
}
}
// priors including all constants: poisson
target += normal_lpdf(zb | 0, 1);
target += normal_lpdf(hs_local[1] | 0, 1)
- 101 * log(0.5);
target += inv_gamma_lpdf(hs_local[2] | 0.5 * hs_df, 0.5 * hs_df);
target += normal_lpdf(hs_global[1] | 0, 1)
- 1 * log(0.5);
target += inv_gamma_lpdf(hs_global[2] | 0.5 * hs_df_global, 0.5 * hs_df_global);
target += inv_gamma_lpdf(hs_c2 | 0.5 * hs_df_slab, 0.5 * hs_df_slab);
}
generated quantities {
array[n_site,n_gs] real n_obs_pred;
array[n_site, n_gs] real n_obs_true;
array[n_site] real N_site;
array[n_site] real N_site_pred;
array[n_site, max_int_dist+1] real DetCurve;
array[n_site, n_gs] real log_lik1;
array[n_site, n_gs] real log_lik2;
array[n_site] real log_lik;
vector[n_site] Site_lambda;
vector[n_site] psi;
array[npc] real pred;
array[npc] real pred_trunc;
array[np_reg] real Nhat_reg;
real av_gs;
// array[np_reg] real Nhat_reg_design;
real Nhat;
real Nhat_trunc;
int trunc_counter;
trunc_counter = 0;
for(n in 1:n_site) {
for(j in 1:n_gs) {
log_lik1[n,j] = multinomial_logit_lpmf(y[n,,j] | to_vector(log_p_raw[n,,j])); //for loo
log_lik2[n,j] = poisson_lpmf(n_obs[n,j] | lambda[n,j]); //for loo
n_obs_true[n, j] = gs[j] * (poisson_log_rng(log_lambda_psi[n] + log(eps_ngs[j])));
n_obs_pred[n,j] = gs[j] * poisson_rng(lambda[n,j]);
}
// get loglik on a site level
log_lik[n] = log_sum_exp(log_sum_exp(log_lik1[n,]), log_sum_exp(log_lik2[n,]));
Site_lambda[n] = exp(log_lambda_psi[n]);
N_site[n] = sum(n_obs_true[n,]);
N_site_pred[n] = sum(n_obs_pred[n,]);
// Occupancy probability transformation
psi[n] = inv_cloglog(log_lambda_psi[n]);
// loop over distance bins
for(j in 0:max_int_dist) { // get DS predictions for distance 0 to max bin distance
DetCurve[n, j+1] = exp(-(j+0.5)^2/(2*sigma[n]^2)); // half normal
// DetCurve[n, j+1] = 1 - exp(-(j+0.5/theta)^(-sigma[n])); //hazard rate
}
}
av_gs = sum(gs .* eps_ngs);
for(i in 1:np_reg) Nhat_reg[i] = 0;
for(i in 1:npc) {
pred[i] = poisson_log_rng(X_pred_psi[i,] * beta_psi + eps_bioregion[pred_bioreg[i]]) * prop_pred[i] * av_gs; //offset
if(pred[i] > max(N_site)) {
pred_trunc[i] = max(N_site);
trunc_counter += 1;
} else {
pred_trunc[i] = pred[i];
} // upper limit placed at highest site estimate
Nhat_reg[pred_reg[i]] += pred[i];
}
Nhat = sum(pred);
Nhat_trunc = sum(pred_trunc);
}model_poisson <- cmdstan_model(here::here("stan", "count_det_nondet_poisson.stan"))
model_poisson_co <- cmdstan_model(here::here("stan", "count_only_poisson.stan"))
We can fit models using cmdstanr. Here we fit models using a poisson distribution. One model for each of the four deer species. To aid convergence adapt_delta is increased from the default value.
integrated_model <- c(TRUE, TRUE, TRUE, FALSE)
distributions <- c("poisson")
model_fits <- list()
for(j in 1:length(distributions)) {
for(i in 1:length(deer_species_all)) {
if(integrated_model[i]) {
model_to_fit <- get(paste0("model_", distributions[j]))
} else {
model_to_fit <- get(paste0("model_", distributions[j], "_co"))
}
model_fits[[i]] <- model_to_fit$sample(data = model_data[[i]],
chains = nc,
parallel_chains = nc,
init = 0.1,
max_treedepth = 10,
refresh = 20,
step_size = 0.01,
adapt_delta = 0.99,
show_messages = TRUE,
save_warmup = FALSE,
iter_sampling = ni,
iter_warmup = nw)
model_fits[[i]]$save_object(paste0("outputs/models/fit_",
distributions[j],
"_",
deer_species_all[i],".rds"))
}
}
Our strategy for model evaluation is to determine if the models of the sparse regression approach (horseshoe-prior regularised model) preform sufficiently well against a null model (no abundance fixed-effect covariates).
We use leave-one-out cross-validation (LOO-CV); a Bayesian model evaluation process (Vehtari, Gelman, and Gabry 2017; Vehtari et al. 2020) to compare the model against a null model. As long as the model with ‘all’ parameters performs better than the null models it is suitable to use it exclusively for all analyses and predictions. Importantly, the cross-validation also gives us the ability to compare the predictive performance against a null model. In doing so we see that the model with abundance covariates performs better than a null model. Noting that the null model will still include a random bioregion effect.
loo_compares <- list()
loo_compare_tables <- list()
for(i in 2:length(deer_species_all)) {
which_models <- model_fits[stringr::str_detect(models_to_read, pattern = deer_species_all[i])]
loo_compares[[i]] <- list()
for(j in 1:length(which_models)) {
loo_compares[[i]][[j]] <- which_models[[j]]$loo(cores = 6)
}
names(loo_compares[[i]]) <- names(which_models)
loo_compare_tables[[i]] <- loo::loo_compare(loo_compares[[i]]) %>% as.data.frame()
loo_compare_tables[[i]]$Species <- deer_species_all[i]
}
names(loo_compare_tables) <- deer_species_all[1:2]
loo_table_all <- bind_rows(loo_compare_tables) %>%
tibble::rownames_to_column(var = "model_full") %>%
mutate(model = stringr::str_extract(model_full, "poisson|negbin")) %>%
dplyr::select(Species, model, everything(), -model_full) %>%
arrange(Species) %>%
as.data.frame()
# Plot loo table
gt(loo_table_all,
groupname_col = "Species") %>%
fmt_number(decimals = 2) %>%
tab_style(locations = cells_row_groups(), style = cell_text(weight = "bold"))
Posterior predictive checks allow us to compare the observed data to the model-generated data. For each species we undertake posterior predictive checks for summary statistics relating to the number of deer seen on the cameras at each site. Ideally a well-fit model is able to make predictions that match the observed data. The summary statistics we use for the posterior predictive checks are:
q95 <- function(x) quantile(x, 0.95, na.rm = T)
# q25 <- function(x) quantile(x, 0.25, na.rm = T)
q75 <- function(x) quantile(x, 0.75, na.rm = T)
# sd_90 <- function (x, na.rm = FALSE) {
# quants <- quantile(x, c(0.0, 0.95), na.rm = T)
# x <- x[x < quants[2] & x > quants[1]]
# sqrt(var(if (is.vector(x) || is.factor(x)) x else as.double(x),
# na.rm = na.rm))
# }
funs <- c(q95, mean, sd, ppc_scatter_avg)
titles <- c("95% Percentile", "Mean", "SD", "Average Site Counts")
ppc_sambar <- list()
# funs <- c(prop_zero, mean, q90, sd)
for(i in 1:length(funs)) {
ppc_sambar[[i]] <- posterior_checks(model = model_fits[[1]],
model_data = model_data$`Cervus unicolor`,
stat = funs[[i]],
integrated = T,
title = titles[i])
}
cowplot::plot_grid(plotlist = ppc_sambar, labels = "AUTO")
Figure 1: Posterior predictive checks for Sambar Deer
Figure 2: Posterior predictive checks for Fallow Deer
funs <- c(max, mean, sd, ppc_scatter_avg)
titles <- c("Maximum", "Mean", "SD", "Average Site Counts")
ppc_red <- list()
for(i in 1:length(funs)) {
ppc_red[[i]] <- posterior_checks(model = model_fits[[3]],
model_data = model_data$`Cervus elaphus`,
stat = funs[[i]],
integrated = T, only_det = F,
title = titles[i])
}
cowplot::plot_grid(plotlist = ppc_red)
Figure 3: Posterior predictive checks for Red Deer
Figure 4: Posterior predictive checks for Hog Deer
Within the STAN model we generate predictions for sampled and unsampled locations. This provides us with site-level abundance estimates as well as estimates across all (unsampled) public forest.
Visualisation of point-estimates for the various deer species surveyed for in this study.
vic_regions <- vicmap_query("open-data-platform:delwp_region") %>%
collect() %>%
st_transform(3111) %>%
st_simplify(dTolerance = 500)
site_preds <- function(model, cams_curated) {
rn_dens <- model$summary("N_site")
density_at_sites_rn <- cbind(cams_curated, rn_dens)
return(density_at_sites_rn)
}
site_density_plot <- function(densities, regions, species) {
densities$density <- cut(densities$mean,
breaks = c(0, 0.1, 1, 3, 5, 10, max(densities$mean)),
labels = c("<0.1", "0.1 - 1", "1-3", "3 - 5", "5 - 10", "10+"), include.lowest = T, right = T)
if(species == "Hog") {
regions <- regions %>% filter(delwp_region == "GIPPSLAND")
densities <- densities %>% st_filter(regions %>% st_transform(4283))
}
plot <- ggplot2::ggplot(data = densities) +
ggplot2::geom_sf(data = regions, alpha = 0.75, fill = "grey80") +
ggplot2::geom_sf(aes(fill = density, alpha = mean), shape = 21, size = 4) +
scale_fill_viridis_d(name = "", guide = guide_legend(override.aes = list(size = 6))) +
scale_alpha_continuous(range = c(0.5,1), guide = "none") +
labs(title = paste0('Density of ',species ,' Deer'), fill = bquote('Deer per'~km^2)) +
theme_bw() +
theme(legend.text = element_text(size = 18), legend.key.size = unit(1, "cm"),
title = element_text(size = 22))
return(plot)
}
sambar_preds <- site_preds(model_fits[[1]], cams_curated = cams_curated)
fallow_preds <- site_preds(model_fits[[2]], cams_curated = cams_curated)
red_preds <- site_preds(model_fits[[3]], cams_curated = cams_curated)
hog_preds <- site_preds(model_fits[[4]], cams_curated = cams_curated)
cowplot::plot_grid(site_density_plot(sambar_preds, vic_regions, species = "Sambar"),
site_density_plot(fallow_preds, vic_regions, species = "Fallow"),
site_density_plot(red_preds, vic_regions, species = "Red"),
site_density_plot(hog_preds, vic_regions, species = "Hog"), ncol = 1)
Figure 5: Site-level density estimates for all sites sampled as part of statewide and hog deer surveys
As a sanity-check we compare the average modelled densities at sites with (i) no evidence of deer, (ii) evidence of deer present on transects, and (iii) evidence of deer present on cameras. We expect that average densities are generally higher at sites that detected some form of deer than sites that did not detect any sign of deer. Additionally, we would also expect average densities to be generally higher at sites that had detections on cameras than those with only detections from transects. The table below shows these expectations to be correct for Sambar and partially for Fallow. Red Deer had too few detections for this check to be comprehensive but alongside Hog Deer, Red Deer density was higher at sites with detections on the camera than those where they weren’t seen.
density_summary_table <- function(preds, model_data, species) {
cam_seen <- as.integer(as.logical(rowSums(model_data$n_obs)))
preds_sum <- preds %>%
st_drop_geometry() %>%
mutate(`Species` = species,
`CameraDetection` = cam_seen,
`AnyDetection` = model_data$any_seen,
`Detection` = case_when(CameraDetection == 0 & AnyDetection == 0 ~ "Not seen",
CameraDetection == 0 & AnyDetection == 1 ~ "Only detected on transects",
CameraDetection == 1 & AnyDetection == 1 ~ "Seen on cameras")) %>%
group_by(`Species`, `Detection`) %>%
summarise(`Number of Sites` = n(),
`Average Density` = mean(mean)) %>%
ungroup()
return(preds_sum)
}
density_summary <- bind_rows(
density_summary_table(sambar_preds, model_data[[1]], species = "Sambar"),
density_summary_table(fallow_preds, model_data[[2]], species = "Fallow"),
density_summary_table(red_preds, model_data[[3]], species = "Red"),
density_summary_table(hog_preds, model_data[[4]], species = "Hog"))
density_summary %>%
kbl(format = "html", digits = 2) %>%
kable_styling("striped") %>%
collapse_rows(1)
| Species | Detection | Number of Sites | Average Density |
|---|---|---|---|
| Sambar | Not seen | 180 | 0.63 |
| Only detected on transects | 33 | 2.00 | |
| Seen on cameras | 104 | 2.84 | |
| Fallow | Not seen | 253 | 0.81 |
| Only detected on transects | 34 | 0.73 | |
| Seen on cameras | 30 | 3.03 | |
| Red | Not seen | 304 | 0.06 |
| Only detected on transects | 1 | 1.26 | |
| Seen on cameras | 12 | 1.12 | |
| Hog | Not seen | 295 | 0.19 |
| Seen on cameras | 22 | 2.41 |
Within our model we calculate abundance/density for deer in each of the 73323 km2 of public land. Based on these spatial predictions we can estimate abundance at a regional level (6 DEECA regions) and across the whole state.
Using the model predictions (“pred”) for all suitable public land we generate a raster (1km2 resolution). We save the average spatial estimates under outputs/rasters (tif files) and also provide binned plots (png files) in outputs/plots.
pred_raster_full <- terra::rast(prediction_raster)
pred_raster <- terra::app(pred_raster_full[[stringr::str_subset(
stringr::str_remove_all(labels(terms(ab_formula_3)),
"scale[(]|[)]|sqrt[(]"),
pattern = "[*]", negate = T)]], mean)
mean_raster <- list()
mean_raster_discrete <- list()
for(i in 1:length(model_fits)) {
gp_preds_draws_all <- model_fits[[i]]$draws("pred", format = "matrix")
terra::values(pred_raster)[!is.na(terra::values(pred_raster))] <- apply(gp_preds_draws_all,
2,
mean,
na.rm = T,
trim = 0)
mean_raster[[i]] <- pred_raster
mean_raster_discrete[[i]] <- mean_raster[[i]]
max_pred <- max(values(mean_raster[[i]]), na.rm = T)
values(mean_raster_discrete[[i]]) <- cut(values(mean_raster_discrete[[i]]) ,
breaks = c(0, 0.1,1,3,5,10, max_pred),
include.lowest = T, right = T,
labels = c("0 - 0.1",
"0.1 - 1",
"1 - 3",
"3 - 5",
"5 - 10",
"10 +"))
}
# combine mean rasters together
combined_raster <- rast(mean_raster)
names(combined_raster) <- c("Average Sambar Deer Density (per km2)",
"Average Fallow Deer Density (per km2)",
"Average Red Deer Density (per km2)",
"Average Hog Deer Density (per km2)")
writeRaster(combined_raster, "outputs/rasters/combined_deer_average_density.tif", overwrite = T)
# reg <- VicmapR::vicmap_query("open-data-platform:delwp_region") %>%
# VicmapR::collect() %>%
# sf::st_transform(3111) %>%
# sf::st_simplify(dTolerance = 250)
state <- VicmapR::vicmap_query("open-data-platform:vmlite_victoria_polygon_su5") %>%
filter(state != "NSW" & state != "SA" & feature_type_code != "sea") %>%
VicmapR::collect() %>%
sf::st_transform(3111)
gippsland <- vic_regions %>% filter(delwp_region == "GIPPSLAND")
plot_abundance <- function(raster, state, species, crop = NULL) {
if(!is.null(crop)) {
raster <- terra::crop(raster, vect(crop), mask = T)
state <- crop
}
plot <- ggplot2::ggplot() +
ggplot2::geom_sf(data = state, alpha = 0.5, linewidth = 0.5, fill = "grey80") +
tidyterra::geom_spatraster(data = raster, na.rm = T) +
tidyterra::scale_fill_terrain_d(na.translate = FALSE) +
ggplot2::labs(fill = bquote('Deer per'~km^2), title = paste0("Average Density of ", species, " on Victorian Public Land")) +
ggspatial::annotation_scale()
return(plot)
}
sambar_abundance_plot <- plot_abundance(mean_raster_discrete[[1]],
state = state,
species = "Sambar Deer")
fallow_abundance_plot <- plot_abundance(mean_raster_discrete[[2]],
state = state,
species = "Fallow Deer")
red_abundance_plot <- plot_abundance(mean_raster_discrete[[3]],
state = state,
species = "Red Deer")
hog_abundance_plot <- plot_abundance(mean_raster_discrete[[4]],
state = state,
species = "Hog Deer",
crop = gippsland)
ggsave(plot = sambar_abundance_plot, filename = "outputs/plots/sambar_abundance_plot.png",
width = 2400, height = 1600, units = "px")
ggsave(plot = fallow_abundance_plot, filename = "outputs/plots/fallow_abundance_plot.png",
width = 2400, height = 1600, units = "px")
ggsave(plot = red_abundance_plot, filename = "outputs/plots/red_abundance_plot.png",
width = 2400, height = 1600, units = "px")
ggsave(plot = hog_abundance_plot, filename = "outputs/plots/hog_abundance_plot.png",
width = 2400, height = 1600, units = "px")
cowplot::plot_grid(sambar_abundance_plot, fallow_abundance_plot, red_abundance_plot, hog_abundance_plot, ncol = 1)
Figure 6: Abundance of Sambar, Fallow, Red and Hog Deer across Victoria, dark-grey area reflects area not included in predictions (i.e. not public land)
For each DEECA region we provide species-level estimates of abundance with 90 % confidence interval. We also calculate the modle-based average density within each region based on the abundance and the total area of public land within each region.
reg <- VicmapR::vicmap_query("open-data-platform:delwp_region") %>%
VicmapR::collect() %>%
dplyr::group_by(delwp_region) %>%
dplyr::summarise(geometry = sf::st_combine(geometry)) %>%
dplyr::ungroup() %>%
dplyr::mutate(delwp_region_fact = as.integer(factor(delwp_region)))
abundance_table <- function(model, regions, pred_area, caption) {
regional_abundance <- model$summary("Nhat_reg") %>%
dplyr::bind_rows(model$summary("Nhat")) %>%
dplyr::mutate(variable = c(regions$delwp_region, "TOTAL"),
`Area km2` = c(pred_area, sum(pred_area)),
`Average Density (per km2)` = round(mean/`Area km2`, 2)) %>%
dplyr::select(Region = variable,
Mean = mean,
Median = median,
SD = sd,
`5 %` = q5,
`90 %` = q95,
`Area km2`,
`Average Density (per km2)`)
kableExtra::kbl(regional_abundance, digits = 1, format = "html", caption = caption) %>%
kableExtra::kable_styling("striped") %>%
kableExtra::column_spec(1, bold = TRUE) %>%
kableExtra::row_spec(6, hline_after = T) %>%
kableExtra::row_spec(7, background = "#c2a5cf", color = "black", bold = T, hline_after = T)
}
abundance_table(model_fits[[1]], regions = reg, pred_area = table(model_data[[1]]$pred_reg), caption = "Regional estimates of Sambar Deer Density")
| Region | Mean | Median | SD | 5 % | 90 % | Area km2 | Average Density (per km2) |
|---|---|---|---|---|---|---|---|
| BARWON SOUTH WEST | 57.3 | 52.7 | 20.8 | 33.0 | 96.3 | 4764 | 0.0 |
| GIPPSLAND | 39153.5 | 38863.9 | 3863.1 | 33451.4 | 46067.7 | 24922 | 1.6 |
| GRAMPIANS | 758.5 | 749.0 | 108.4 | 594.5 | 950.8 | 9515 | 0.1 |
| HUME | 46838.3 | 46477.6 | 4657.8 | 39860.1 | 55014.5 | 16672 | 2.8 |
| LODDON MALLEE | 690.8 | 639.8 | 213.2 | 491.0 | 1065.3 | 15218 | 0.0 |
| PORT PHILLIP | 4001.2 | 3975.3 | 409.4 | 3387.7 | 4749.9 | 2232 | 1.8 |
| TOTAL | 91499.5 | 90701.7 | 8991.7 | 78056.8 | 107448.2 | 73323 | 1.2 |
abundance_table(model_fits[[2]], regions = reg, pred_area = table(model_data[[2]]$pred_reg), caption = "Regional estimates of Fallow Deer Density")
| Region | Mean | Median | SD | 5 % | 90 % | Area km2 | Average Density (per km2) |
|---|---|---|---|---|---|---|---|
| BARWON SOUTH WEST | 4162.8 | 4135.1 | 497.9 | 3400.0 | 5019.8 | 4764 | 0.9 |
| GIPPSLAND | 5290.4 | 5260.7 | 575.7 | 4392.8 | 6287.0 | 24922 | 0.2 |
| GRAMPIANS | 2456.4 | 2439.3 | 281.2 | 2022.6 | 2946.6 | 9515 | 0.3 |
| HUME | 17017.9 | 16934.4 | 1794.7 | 14275.3 | 20085.8 | 16672 | 1.0 |
| LODDON MALLEE | 1194.1 | 1171.2 | 205.8 | 916.3 | 1547.1 | 15218 | 0.1 |
| PORT PHILLIP | 2433.9 | 2418.2 | 272.8 | 2015.0 | 2893.3 | 2232 | 1.1 |
| TOTAL | 32555.5 | 32414.7 | 3364.9 | 27359.5 | 38299.6 | 73323 | 0.4 |
abundance_table(model_fits[[3]], regions = reg, pred_area = table(model_data[[3]]$pred_reg), caption = "Regional estimates of Red Deer Density")
| Region | Mean | Median | SD | 5 % | 90 % | Area km2 | Average Density (per km2) |
|---|---|---|---|---|---|---|---|
| BARWON SOUTH WEST | 2533.5 | 2441.7 | 612.7 | 1715.6 | 3665.1 | 4764 | 0.5 |
| GIPPSLAND | 795.0 | 768.2 | 195.0 | 527.8 | 1148.7 | 24922 | 0.0 |
| GRAMPIANS | 1965.7 | 1884.8 | 495.1 | 1294.7 | 2894.7 | 9515 | 0.2 |
| HUME | 2458.3 | 2407.3 | 514.7 | 1728.9 | 3377.5 | 16672 | 0.1 |
| LODDON MALLEE | 45.2 | 13.6 | 105.1 | 1.0 | 195.7 | 15218 | 0.0 |
| PORT PHILLIP | 20.3 | 19.2 | 8.4 | 8.7 | 35.8 | 2232 | 0.0 |
| TOTAL | 7818.1 | 7617.7 | 1581.5 | 5629.6 | 10578.1 | 73323 | 0.1 |
abundance_table(model_fits[[4]], regions = reg, pred_area = table(model_data[[4]]$pred_reg), caption = "Regional estimates of Hog Deer Density")
| Region | Mean | Median | SD | 5 % | 90 % | Area km2 | Average Density (per km2) |
|---|---|---|---|---|---|---|---|
| BARWON SOUTH WEST | 10.9 | 6.7 | 12.8 | 0.0 | 35.8 | 4764 | 0.0 |
| GIPPSLAND | 2416.0 | 2396.6 | 267.2 | 2026.2 | 2892.3 | 24922 | 0.1 |
| GRAMPIANS | 1.8 | 0.0 | 5.7 | 0.0 | 7.8 | 9515 | 0.0 |
| HUME | 2.3 | 1.1 | 3.9 | 0.0 | 9.1 | 16672 | 0.0 |
| LODDON MALLEE | 0.8 | 0.0 | 3.9 | 0.0 | 3.4 | 15218 | 0.0 |
| PORT PHILLIP | 364.6 | 361.8 | 45.5 | 295.8 | 445.2 | 2232 | 0.2 |
| TOTAL | 2796.5 | 2774.8 | 308.0 | 2346.6 | 3345.8 | 73323 | 0.0 |
Our model uses covariates to inform three seperate submodels:
At a site, there are two processes that may lead us to not record deer on a camera when in reality they occupy the site (whereby the site is the home range area surrounding the camera.) Firstly, deer may be available for detection and enter the sampling area in front of the camera. However, due to a function of their distance from the camera we fail to detect this individual. Secondly, they may occupy a site (known from transect signs) but never enter the camera field of view, in this case we estimate the various detection rates from the various methods. The detection rate of the camera in this method is regarded as the spatial availability of deer for camera trap sampling.
Below we show the average detection rate for a given group of deer in front of the camera (up to 12.5m). Detection rastes appear to be higher for Sambar than other species.
species_names <- c("Sambar Deer", "Fallow Deer", "Red Deer", "Hog Deer")
av_det_rates <- list()
for(i in 1:length(model_fits)) {
det_rates <- model_fits[[i]]$summary("p") %>%
mutate(var = stringr::str_extract(variable, "(?<=\\[).+?(?=\\])")) %>%
separate(var, into = c("site", "Group Size"), sep = ",")
av_det_rates[[i]] <- det_rates %>%
group_by(`Group Size`) %>%
summarise(mean = mean(mean)) %>%
ungroup() %>%
transmute(`Group Size`,
Species = species_names[i],
`Average Site Detection Probability` = mean)
}
av_det_rates %>%
bind_rows() %>%
arrange(`Group Size`) %>%
kbl("html", digits = 3, caption = "Average detection rates for the different species and different group sizes") %>%
kable_styling("striped") %>%
collapse_rows(1)
| Group Size | Species | Average Site Detection Probability |
|---|---|---|
| 1 | Sambar Deer | 0.451 |
| Fallow Deer | 0.413 | |
| Red Deer | 0.289 | |
| Hog Deer | 0.281 | |
| 2 | Sambar Deer | 0.569 |
| Fallow Deer | 0.546 | |
| Red Deer | 0.423 | |
| Hog Deer | 0.413 | |
| 3 | Sambar Deer | 0.644 |
| Fallow Deer | 0.628 | |
| Hog Deer | 0.502 | |
| 4 | Sambar Deer | 0.695 |
| Fallow Deer | 0.684 | |
| 5 | Sambar Deer | 0.732 |
| Fallow Deer | 0.724 |
For a group size of 1 we can generate detection function plots that show how detection rates fall-off over varying distances.
Note the following data suggests we should test hazard functions for Fallow, Red and Hog.
site_vars <- dplyr::tbl(con, dbplyr::in_schema("deervic", "curated_site_data")) %>%
dplyr::filter(SiteID %in% !!c(cams_curated$SiteID, "47191")) %>%
dplyr::collect() %>%
dplyr::mutate(HerbaceousUnderstoryCover = NNWHUCover + ENWHUCover,
SiteID = dplyr::case_when(SiteID == "47191" & CameraID == "HO04101053" ~ "47191A",
SiteID == "47191" & CameraID != "HO04101053" ~ "47191B",
TRUE ~ SiteID)) %>% # native + exotic herbaceous cover
dplyr::arrange(SiteID) %>%
as.data.frame()
n_distance_bins <- 5
delta <- 2.5
midpts <- c(1.25, 3.75, 6.25, 8.75, 11.25)
max_distance <- 12.5
detection_plot_HN <- list()
for(i in 1:length(species_names)) {
det_curve <- model_fits[[i]]$draws("DetCurve", format = "draws_matrix") %>%
as.data.frame() %>%
head(250) %>%
pivot_longer(cols = everything())
det_curve_wr <- det_curve %>%
mutate(var = stringr::str_extract(name, "(?<=\\[).+?(?=\\])")) %>%
separate(var, into = c("s", "Distance"), sep = ",")
det_vars_pred <- site_vars %>%
mutate(s = as.character(1:nrow(.)),
herbaceouslvl = cut(HerbaceousUnderstoryCover,
breaks = c(0, 25, 50, 75, 105),
labels = c("0 - 25%",
"25 - 50%",
"50 - 75%",
"75 - 100%"),
include.lowest = T, right = FALSE))
det_curve_sum <- det_curve_wr %>%
mutate(Distance = as.numeric(Distance)-1) %>%
left_join(det_vars_pred) %>%
group_by(herbaceouslvl, Distance) %>%
summarise(median = quantile(value, 0.5),
q5 = quantile(value, 0.05),
q95 = quantile(value, 0.95))
y_combined <- colSums(model_data[[i]]$y[,,1]) %>% # just for group size 1
as.data.frame() %>%
`colnames<-`("Count") %>%
mutate(Distance = midpts,
Prop = Count/(max(Count)),
CountS = Count/(2 * 2.5 * Distance)/max_distance^2,
PropS = CountS/(max(CountS)))
detection_plot_HN[[i]] <- ggplot(aes(x = Distance), data = det_curve_sum) +
geom_col(aes(y = PropS), fill = "grey70", colour = "grey20", width = 2.5, data = y_combined, alpha = 0.7) +
# geom_errorbar(aes(ymin = PropSq5, ymax = PropSq95), data = y_combined) +
# geom_line(aes(y = HNS)) +
geom_ribbon(aes(ymin = q5, ymax = q95, fill = herbaceouslvl), alpha = 0.5) +
geom_line(aes(y = median, colour = herbaceouslvl)) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), limits = c(0,1)) +
scale_fill_brewer(palette = "Set1") +
scale_colour_brewer(palette = "Set1") +
ylab("Detection probability (p)") +
labs(fill = "Herbaceous\nUnderstorey\nCover", colour = "Herbaceous\nUnderstorey\nCover") +
theme_classic()
}
cowplot::plot_grid(plotlist = detection_plot_HN, ncol = 2)
For Sambar, Fallow and Red Deer we were able to estimate detection rates of the various methods used to detect deer (camera, footprints, pellets, rubbings and wallows).
det_data_species <- list()
for(j in 1:3) {
all_draws <- model_fits[[j]]$draws("beta_trans_det", format = "df")
det_marginal_effects <- list()
det_plot <- list()
obs_vars <- unlist(stringr::str_remove_all(string = labels(terms(transect_formula)), pattern = "log|scale|\\)|\\("))
obs_cols <- unlist(stringr::str_remove_all(string = colnames(model_data[[j]]$trans_pred_matrix), pattern = "log|scale|\\)|\\("))
obs_logs <- unlist(stringr::str_detect(string = colnames(model_data[[j]]$trans_pred_matrix), pattern = "log\\("))
obs_labs <- c("Survey Method")
fac <- c(TRUE, TRUE, TRUE, TRUE, TRUE)
fac2 <- c(TRUE)
det_obs <- model_data[[j]]$transects %>%
mutate(Survey = factor(Survey))
params_w_levs <- levels(det_obs$Survey)
for(i in 1:(length(obs_cols))) {
det_marginal_effects[[i]] <- marginal_effects_cmd(all_draws,
param = "beta_trans_det",
param_number = i, log = obs_logs[i],
model_data = det_obs,
model_column = obs_vars[c(1,attr(model_data[[j]]$trans_pred_matrix, "assign")[-1])[i]],
transition = FALSE) %>%
mutate(group = param,
param = params_w_levs[i],
factor = fac[i],
variable = as.numeric(variable))
if(fac[i]) {
det_marginal_effects[[i]] <- det_marginal_effects[[i]]
}
}
marginal_prob <- function(x, pwr = 3) {
xm <- 1-x
return(1-(xm^pwr))
}
det_marginal_effects_bind <- bind_rows(det_marginal_effects) %>%
rowwise() %>%
mutate(value = case_when(param != "Camera" ~ marginal_prob(value),
TRUE ~ value))
det_marginal_effects_split <- split(det_marginal_effects_bind, det_marginal_effects_bind$group)
for(i in 1:length(det_marginal_effects_split)) {
if(fac2[i]) {
plot_data <- det_marginal_effects_split[[i]] %>%
mutate(variable = param,
param = group)
} else {
plot_data <- det_marginal_effects_split[[i]]
}
det_plot[[i]] <- plot_data
}
det_data_species[[j]] <- bind_rows(det_plot) %>%
mutate(species = species_names[j])
}
data_for_plot <- bind_rows(det_data_species)
marginal_effects_plot_cmd_all(data_for_plot,
factor = TRUE,
ylab = "Detection probability") +
xlab("Survey method") +
scale_y_continuous(limits = c(0,1)) +
theme(legend.position = "top") +
scale_fill_brewer(palette = "Set1") +
scale_x_discrete(expand = c(0, 0))
Figure 7: Detection Probability for the various methods of survey. Camera trap detection probability is based on average deployment length, while transects are based on 3 independent transect searches
We used a spatially-derived random-effect of the bioregion of the sampled site. This random-effect allows us the ability to make predictions that include the variance associated with this random effect. This random-effect also minimises predictions in areas without deer detections (e.g. the mallee).
bioregion_contribution <- function(model, data, species) {
eps_bioregion <- model$draws("eps_bioregion", format = "matrix")
bioregion_data <- data[["bioreg_sf"]]
colnames(eps_bioregion) <- bioregion_data$bioregion
plot <- mcmc_areas(eps_bioregion, prob_outer = 0.9) +
ggtitle(species)
return(plot)
}
bio_plot_data <- list()
for(i in 1:length(species_names)) {
bio_plot_data[[i]] <- bioregion_contribution(model = model_fits[[i]],
data = model_data[[i]],
species = species_names[i])
}
cowplot::plot_grid(plotlist = bio_plot_data, ncol = 1)
Figure 8: Influence of the bioregion random effect on abundance (log-scale).
ab_joined_list <- list()
for(j in 1:length(species_names)) {
beta_draws <- model_fits[[j]]$draws("beta_psi", format = "df")
ab_marginal_effects <- list()
ab_plot <- list()
if(deer_species_all[j] == "Axis porcinus") {
ab_to_use <- ab_formula_3
} else {
ab_to_use <- ab_formula_2
}
phi_vars <- unlist(stringr::str_remove_all(string = labels(terms(ab_to_use)), pattern = "log|scale|sqrt|\\)|\\("))
phi_logs <- unlist(stringr::str_detect(string = labels(terms(ab_to_use)), pattern = "log\\("))
phi_sqrts <- c(0.5,0.5,0.5,1,0.5,1)
phi_labs <- c("Bare Soil (%)",
"Nitrogen (%)",
"Distance to Pastural Land (m)",
"Tree Density (%)",
'Length of forest edge in 1km2 (m)',
"Geographical Distance to coastline")
fac <- c(FALSE, FALSE, FALSE, FALSE, FALSE, FALSE)
for(i in 1:length(phi_vars)) {
ab_marginal_effects[[i]] <- marginal_effects_cmd(beta_draws,
param = "beta_psi",
param_number = i+1, log = phi_logs[i],
model_data = model_data[[j]][["raw_data"]],
abundance = TRUE,
pwr = phi_sqrts[i],
model_column = phi_vars[i],
transition = FALSE) %>%
mutate(species = species_names[j])
}
ab_joined_list[[j]] <- bind_rows(ab_marginal_effects)
}
all_me_data <- bind_rows(ab_joined_list) %>%
mutate(param = case_when(param == "BareSoil" ~ "Bare Soil (%)",
param == "Nitrogen" ~ "Nitrogen (%)",
param == "PastureDistance" ~ "Distance to Pastural Land (m)",
param == "TreeDensity" ~ "Tree Density (%)",
param == "ForestEdge" ~ 'Amount of Forest Edge per km2 (m)',
param == "dist_coast" ~ "Geographical Distance to Coast"))
marginal_effects_plot_cmd_all(all_me_data,
col = "DarkGreen",
factor = FALSE,
ylab = "Contribution to Abundance (log-scale)") +
ggplot2::facet_grid(species~param, scales = "free") +
theme_bw() +
xlab("Covariate Value")
Figure 9: Marginal response curves for the various fixed-effect parameters used in the model.